#确定movielens-100k里的每个用户与物品交互数，以及每个物品和用户的交互数

import numpy as np
from tqdm import tqdm

path = "F:\\LCW\\PycharmProject\\pytorch-learn\\RecSys\\dataset\\patent-100k\\patent-100k.txt"

with open(path,'r') as f:
    data = f.readlines()
    # 读取
    item_usercount = dict()
    item_list = set()
    for line in tqdm(data):
        user = line.split('\t')[0]
        item = line.split('\t')[1]
        if item not in item_usercount:
            item_usercount[item] = 1
            item_list.add(item)
        else:
            item_usercount[item] = item_usercount[item]+1
    # 根据物品交互数量从高到低排序
    item_usercount = sorted(item_usercount.items(), key=lambda x: x[1], reverse=True)
    print(item_usercount)
    print(type(item_usercount))


with open('item2user.txt','a') as w:
    for i in range(len(item_usercount)):
        w.write(item_usercount[i][0] + '\t' + str(item_usercount[i][1])  + '\n')